TODO
s_1ha_qres <- qresid(s_1ha)
par(mfrow = c(2,2))
#histogram
plot(density(s_1ha_qres))
#QQ plot
qqnorm(s_1ha_qres); qqline(s_1ha_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(s_1ha), residuals(s_1ha, type = "response"))
k.check(s_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 2.842177e+00 0.9584417 0.935
## s(spei_history,L) 90 1.077927e+01 NA NA
## s(plot) 4 1.627924e-04 NA NA
Basis dimension checking with gam.check() doesn’t appear to work for dlnm crossbasis smooths. Instead I’ll use a method described in the help file ?choose.k to check for adequate knots. Unfortunately, bs = "cs" doesn’t work with dlnm, so I’ll use select = TRUE instead to reduce chance of overfitting.
check_res_edf(s_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
The crossbasis smooth possibly needs more knots.
# plot_lag_slice(s_1ha, "spei_history", lag = 0) + labs(title = "surv, 1ha")
Here you can see how the CIs hardly overlap 0, but it could be an artifact of the line not being allowed to be wiggly enough.
summary(s_1ha)
##
## Family: binomial
## Link function: logit
##
## Formula:
## surv ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(spei_history,
## L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) + s(plot,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.22347 0.09128 35.312 <2e-16 ***
## flwr_prev1 -0.56668 0.44209 -1.282 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 2.842e+00 9 360.41 <2e-16 ***
## s(spei_history,L) 1.078e+01 22 73.09 <2e-16 ***
## s(plot) 1.628e-04 3 0.00 0.481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0626 Deviance explained = 13.8%
## fREML = 12454 Scale est. = 1 n = 9183
draw(s_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
s_cf_qres <- qresid(s_cf)
par(mfrow = c(2,2))
#histogram
plot(density(s_cf_qres))
#QQ plot
qqnorm(s_cf_qres); qqline(s_cf_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(s_cf), residuals(s_cf, type = "response"))
k.check(s_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 3.457925 0.9099995 0.1925
## s(spei_history,L) 210 12.795976 NA NA
## s(plot) 6 4.336851 NA NA
# looking for near zero edf
check_res_edf(s_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.1
summary(s_cf)
##
## Family: binomial
## Link function: logit
##
## Formula:
## surv ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(spei_history,
## L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) + s(plot,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4656 0.1277 27.147 <2e-16 ***
## flwr_prev1 0.1002 0.2685 0.373 0.709
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 3.458 9 1957.82 <2e-16 ***
## s(spei_history,L) 12.796 20 186.40 <2e-16 ***
## s(plot) 4.337 5 37.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.113 Deviance explained = 20%
## fREML = 43577 Scale est. = 1 n = 31701
draw(s_cf)
## Warning: Removed 939 rows containing non-finite values (stat_contour).
Here I use a random sub-sample to check that differences between continuous forest and fragments are not purely due to sample size differences, particularly differences in the complexity of the crossbasis smooth.
summary(s_1ha)$edf[3]
## [1] 0.0001627924
summary(s_cf)$edf[3]
## [1] 4.336851
summary(s_cf_sub)$edf[3]
## [1] 3.346315
draw(s_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Surface still looks different in a similar way though.
appraise(g_1ha)
k.check(g_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 4.193967 0.9818494 0.1025
## s(spei_history,L) 60 18.181554 NA NA
## s(plot) 4 2.826248 NA NA
check_res_edf(g_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
summary(g_1ha)
##
## Family: Scaled t(4.729,0.477)
## Link function: identity
##
## Formula:
## log_size ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) +
## s(spei_history, L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) +
## s(plot, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.17955 0.08340 50.117 <2e-16 ***
## flwr_prev1 0.08603 0.03601 2.389 0.0169 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(log_size_prev) 4.194 9 3006.20 <2e-16 ***
## s(spei_history,L) 18.182 21 27.04 <2e-16 ***
## s(plot) 2.826 3 23.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.696 Deviance explained = 63.2%
## fREML = 12189 Scale est. = 1 n = 8527
draw(g_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
gratia::appraise(g_cf)
k.check(g_cf)
## k' edf k-index p-value
## s(log_size_prev) 24 9.386988 0.967252 0.015
## s(spei_history,L) 60 15.144505 NA NA
## s(plot) 6 4.080451 NA NA
check_res_edf(g_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
# plot_lag_slice(g_1ha, "spei_history", lag = 33)
hmm… CIs are super narrow here, but effect size is tiny
summary(g_cf)
##
## Family: Scaled t(3.852,0.414)
## Link function: identity
##
## Formula:
## log_size ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) +
## s(spei_history, L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) +
## s(plot, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.36045 0.03401 128.214 <2e-16 ***
## flwr_prev1 0.02856 0.01476 1.934 0.0531 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(log_size_prev) 9.387 24 6717.102 <2e-16 ***
## s(spei_history,L) 15.145 20 95.585 <2e-16 ***
## s(plot) 4.080 5 7.035 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.785 Deviance explained = 69.1%
## fREML = 41001 Scale est. = 1 n = 28849
draw(g_cf)
## Warning: Removed 968 rows containing non-finite values (stat_contour).
To check that differences are not purely due to sample size differences, particularly that lower edf in continuous forests is due to higher sample size.
summary(g_1ha)$edf[3]
## [1] 2.826248
summary(g_cf)$edf[3]
## [1] 4.080451
summary(g_cf_sub)$edf[3]
## [1] 4.82767
edf of subsample is similar to full dataset, despite differences in sample size.
draw(g_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Crossbasis surface looks nearly identical.
f_1ha_qres <- qresid(f_1ha)
par(mfrow = c(2,2))
#histogram
plot(density(f_1ha_qres))
#QQ plot
qqnorm(f_1ha_qres); qqline(f_1ha_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(f_1ha), residuals(f_1ha, type = "response"))
k.check(f_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 3.400302 0.998162 0.96
## s(spei_history,L) 162 14.127281 NA NA
## s(plot) 4 2.514545 NA NA
# looking for near zero edf
check_res_edf(f_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
summary(f_1ha)
##
## Family: binomial
## Link function: logit
##
## Formula:
## flwr ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(spei_history,
## L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) + s(plot,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.2272 0.5207 -11.959 < 2e-16 ***
## flwr_prev1 1.0492 0.1685 6.225 4.8e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 3.400 9 321.86 < 2e-16 ***
## s(spei_history,L) 14.127 26 120.82 < 2e-16 ***
## s(plot) 2.515 3 16.44 0.000106 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.242 Deviance explained = 40%
## fREML = 11617 Scale est. = 1 n = 8527
draw(f_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
f_cf_qres <- qresid(f_cf)
par(mfrow = c(2,2))
#histogram
plot(density(f_cf_qres))
#QQ plot
qqnorm(f_cf_qres); qqline(f_cf_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(f_cf), residuals(f_cf, type = "response"))
k.check(f_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 5.795259 0.9746386 0.71
## s(spei_history,L) 210 11.451012 NA NA
## s(plot) 6 3.950101 NA NA
# looking for near zero edf
check_res_edf(f_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.2
summary(f_cf)
##
## Family: binomial
## Link function: logit
##
## Formula:
## flwr ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(spei_history,
## L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) + s(plot,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.07388 0.22648 -22.40 <2e-16 ***
## flwr_prev1 0.96103 0.08378 11.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 5.795 9 1748.08 <2e-16 ***
## s(spei_history,L) 11.451 20 415.26 <2e-16 ***
## s(plot) 3.950 5 47.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.188 Deviance explained = 32.7%
## fREML = 41028 Scale est. = 1 n = 28849
draw(f_cf)
## Warning: Removed 968 rows containing non-finite values (stat_contour).
To check that differences are not purely due to sample size differences, particularly that lower edf in continuous forests is due to higher sample size. For flowering, edf is actually slightly higher in CF with a larger sample size. With subsample, edf are more similar.
summary(f_1ha)$edf[3]
## [1] 2.514545
summary(f_cf)$edf[3]
## [1] 3.950101
summary(f_cf_sub)$edf[3]
## [1] 4.860281
draw(f_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Crossbasis surface looks extremely similar.
Reproducibility receipt
## datetime
Sys.time()
## [1] "2021-08-24 11:46:23 EDT"
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
## Local: revisions /Users/scottericr/Documents/HeliconiaDemography
## Remote: revisions @ origin (https://github.com/BrunaLab/HeliconiaDemography.git)
## Head: [b1aefac] 2021-08-24: move re smooth to end of formula, just for better printing of summary.
## session info
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices datasets utils
## [8] methods base
##
## other attached packages:
## [1] arm_1.11-2 lme4_1.1-27.1 Matrix_1.3-3 MASS_7.3-54
## [5] qqplotr_0.0.5 Hmisc_4.5-0 Formula_1.2-4 survival_3.2-11
## [9] lattice_0.20-44 readxl_1.3.1 colorspace_2.0-1 rmarkdown_2.7
## [13] statmod_1.4.36 latex2exp_0.5.0 gratia_0.6.0.9112 broom_0.7.6
## [17] patchwork_1.1.1 glue_1.4.2 bbmle_1.0.23.1 dlnm_2.4.5
## [21] mgcv_1.8-36 nlme_3.1-152 lubridate_1.7.10 janitor_2.1.0
## [25] tsModel_0.6 SPEI_1.7 lmomco_2.3.6 tsibble_1.0.1
## [29] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5 purrr_0.3.4
## [33] readr_1.4.0 tidyr_1.1.3 tibble_3.1.1 ggplot2_3.3.5
## [37] tidyverse_1.3.1 here_1.0.1 tarchetypes_0.2.0 targets_0.4.2
## [41] dotenv_1.0.3 conflicted_1.0.4
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 igraph_1.2.6 splines_4.0.2
## [4] digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2
## [7] magrittr_2.0.1 checkmate_2.0.0 memoise_2.0.0
## [10] cluster_2.1.2 modelr_0.1.8 bdsmatrix_1.3-4
## [13] anytime_0.3.9 jpeg_0.1-8.1 rvest_1.0.0
## [16] haven_2.4.1 xfun_0.22 callr_3.7.0
## [19] crayon_1.4.1 jsonlite_1.7.2 gtable_0.3.0
## [22] DEoptimR_1.0-8 abind_1.4-5 scales_1.1.1
## [25] mvtnorm_1.1-1 DBI_1.1.1 Rcpp_1.0.6
## [28] isoband_0.2.4 htmlTable_2.1.0 foreign_0.8-81
## [31] htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [34] ellipsis_0.3.2 pkgconfig_2.0.3 farver_2.1.0
## [37] nnet_7.3-16 sass_0.3.1 dbplyr_2.1.1
## [40] utf8_1.2.1 tidyselect_1.1.1 labeling_0.4.2
## [43] rlang_0.4.11 munsell_0.5.0 cellranger_1.1.0
## [46] tools_4.0.2 cachem_1.0.4 cli_2.5.0
## [49] generics_0.1.0 evaluate_0.14 fastmap_1.1.0
## [52] yaml_2.2.1 goftest_1.2-2 processx_3.5.2
## [55] knitr_1.33 fs_1.5.0 robustbase_0.93-7
## [58] mvnfast_0.2.5.1 xml2_1.3.2 compiler_4.0.2
## [61] rstudioapi_0.13 png_0.1-7 reprex_2.0.0
## [64] clustermq_0.8.95.1 bslib_0.2.4 stringi_1.6.2
## [67] highr_0.9 ps_1.6.0 nloptr_1.2.2.2
## [70] vctrs_0.3.8 pillar_1.6.0 lifecycle_1.0.0
## [73] jquerylib_0.1.4 data.table_1.14.0 R6_2.5.0
## [76] latticeExtra_0.6-29 renv_0.13.2 gridExtra_2.3
## [79] Lmoments_1.3-1 codetools_0.2-18 boot_1.3-28
## [82] assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2
## [85] hms_1.1.0 grid_4.0.2 rpart_4.1-15
## [88] coda_0.19-4 minqa_1.2.4 snakecase_0.11.0
## [91] git2r_0.28.0 numDeriv_2016.8-1.1 base64enc_0.1-3